import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
import datetime
Weekly Updated GDP change estimate by OECD: https://www.oecd.org/economy/weekly-tracker-of-gdp-growth/
The variable Change is defined as the percent change of last week's GDP compared to the same week of the pre-pandemic year. It can be conceptually equivalent to the following, although actual process of calculation is a bit more complicated:
\begin{align*} y_{i,0} &= \textrm{Weekly GDP of the week i of THIS year, measured at PRE-pandemic price level}\\ y_{i,pre} &= \textrm{Weekly GDP of the week i of PRE-pandemic year}\\ \textrm{Change} &= \frac{y_{i,0} - y_{i,pre}}{y_{i,pre}} \quad (\textrm{in percentage}) \end{align*}Under the hood, the OECD tracker is trained with the real-GDP changes, and thus it returns the real change, not the nominal change.
dynamic_data_link = \
"https://github.com/NicolasWoloszko/OECD-Weekly-Tracker/raw/main/Data/weekly_tracker.xlsx"
df_weekly_raw = pd.read_excel(dynamic_data_link)
Here are the list of countries being tracked and estimated. However, the frequency of the tracking is not the same for all countries.
df_weekly_raw['region'].unique()
array(['Argentina', 'Australia', 'Austria', 'Belgium', 'Brazil',
'Bulgaria', 'Canada', 'Chile', 'Colombia', 'Costa Rica',
'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France',
'Germany', 'Greece', 'Hungary', 'Iceland', 'India', 'Indonesia',
'Ireland', 'Israel', 'Italy', 'Japan', 'Korea', 'Latvia',
'Lithuania', 'Luxembourg', 'Mexico', 'Netherlands', 'New Zealand',
'Norway', 'Poland', 'Portugal', 'Romania', 'Russia',
'Slovak Republic', 'Slovenia', 'South Africa', 'Spain', 'Sweden',
'Switzerland', 'Turkey', 'United Kingdom', 'United States'],
dtype=object)
A helper function to return the n_sundays number of date strings of the beginning of the most recent weeks for which the data is available.
def get_past_n_sundays(n_sundays = 3):
date_today = datetime.datetime.now()
days_delta_list = [1 + 7*(i+1) for i in range(n_sundays)]
date_sunday_list = [(date_today -
datetime.timedelta(
days = (each_delta + date_today.weekday())
)
)
for each_delta
in days_delta_list]
sunday_string_list = [each_sunday.strftime("%Y-%m-%d")
for each_sunday
in date_sunday_list]
return sunday_string_list
get_past_n_sundays(n_sundays = 2)
['2021-11-14', '2021-11-07']
We have to filter only the relevant portion of the original raw excel file, which is huge.
# G7 countries
countries_g7 = ['Canada', 'France', 'Germany','Italy',
'Japan','United Kingdom','United States']
weeks = get_past_n_sundays(n_sundays = 2)
def dynamic_data_filter(countries_list, weeks_list):
df_weekly = df_weekly_raw[['region', 'date', 'Tracker (yoy)']].copy()
df_weekly = df_weekly.rename(columns = {"region": "Country",
"date": "Week",
"Tracker (yoy)": "GDP_Change"})
df_weekly = df_weekly[df_weekly["Week"] > '2021-10-01']
df_weekly["Week"] = df_weekly["Week"].astype(str)
df_weekly = df_weekly[df_weekly["Country"].apply(lambda x: x in countries_list)]
df_weekly = df_weekly[df_weekly["Week"].apply(lambda x: x in weeks_list)]
df_weekly = df_weekly.reset_index(drop = True)
return df_weekly
dynamic_data_filter(countries_g7, weeks)
| Country | Week | GDP_Change | |
|---|---|---|---|
| 0 | Canada | 2021-11-07 | 6.030735 |
| 1 | Canada | 2021-11-14 | 8.223468 |
| 2 | France | 2021-11-07 | 12.183032 |
| 3 | France | 2021-11-14 | 13.139209 |
| 4 | Germany | 2021-11-07 | 5.859079 |
| 5 | Germany | 2021-11-14 | 6.657616 |
| 6 | Italy | 2021-11-07 | 13.386478 |
| 7 | Italy | 2021-11-14 | 14.386791 |
| 8 | Japan | 2021-11-07 | 2.303170 |
| 9 | Japan | 2021-11-14 | 3.069305 |
| 10 | United Kingdom | 2021-11-07 | 11.878025 |
| 11 | United Kingdom | 2021-11-14 | 12.121530 |
| 12 | United States | 2021-11-07 | 7.166315 |
| 13 | United States | 2021-11-14 | 7.631384 |
The collected data on greenhouse gas emission is in the unit of CO2 equivalent tonnes during the years from 2015 to 2019, which are the past five years prior to the pandemic. The data is from the OECD: https://stats.oecd.org/Index.aspx?DataSetCode=AIR_GHG
Ideally, we would like to use the values over many years to remove random fluctuations. However, the data from too many years ago would not be relevant. We thought the past five pre-pandemic years would be a good trade-off.
countries_g7
['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom', 'United States']
df_ghg_raw = pd.read_csv("../data/AIR_GHG_25112021005547428.csv")
def ghg_filter(countries_list):
years = [2015, 2016, 2017, 2018, 2019]
df_ghg = df_ghg_raw.copy()
df_ghg = df_ghg[df_ghg["Variable"] == 'Total emissions excluding LULUCF']
df_ghg = df_ghg[df_ghg["Unit"] == 'Tonnes of CO2 equivalent']
df_ghg = df_ghg[df_ghg["Pollutant"] == 'Greenhouse gases']
df_ghg = df_ghg[df_ghg["Year"].apply(lambda x: x in years)]
df_ghg = df_ghg[df_ghg["Country"].apply(lambda x: x in countries_list)]
df_ghg = df_ghg[["Country", "Year", "Value"]]
df_ghg = df_ghg.rename(columns = {"Value":"GHG"})
df_ghg = df_ghg.reset_index(drop = True)
return df_ghg
ghg_filter(countries_g7)
| Country | Year | GHG | |
|---|---|---|---|
| 0 | Canada | 2015 | 723096.018 |
| 1 | Canada | 2016 | 706933.908 |
| 2 | Canada | 2017 | 716092.013 |
| 3 | Canada | 2018 | 728475.890 |
| 4 | Canada | 2019 | 730244.943 |
| 5 | France | 2015 | 464205.673 |
| 6 | France | 2016 | 467059.995 |
| 7 | France | 2017 | 470564.316 |
| 8 | France | 2018 | 452034.456 |
| 9 | France | 2019 | 442984.668 |
| 10 | Germany | 2015 | 904261.811 |
| 11 | Germany | 2016 | 907967.906 |
| 12 | Germany | 2017 | 892075.667 |
| 13 | Germany | 2018 | 855890.413 |
| 14 | Germany | 2019 | 809798.537 |
| 15 | Italy | 2015 | 440436.701 |
| 16 | Italy | 2016 | 437696.091 |
| 17 | Italy | 2017 | 432713.736 |
| 18 | Italy | 2018 | 428549.346 |
| 19 | Italy | 2019 | 418280.596 |
| 20 | Japan | 2015 | 1318516.697 |
| 21 | Japan | 2016 | 1301855.892 |
| 22 | Japan | 2017 | 1288528.719 |
| 23 | Japan | 2018 | 1245020.887 |
| 24 | Japan | 2019 | 1209493.354 |
| 25 | United Kingdom | 2015 | 509050.718 |
| 26 | United Kingdom | 2016 | 484146.195 |
| 27 | United Kingdom | 2017 | 473389.976 |
| 28 | United Kingdom | 2018 | 466666.983 |
| 29 | United Kingdom | 2019 | 453101.458 |
| 30 | United States | 2015 | 6671112.065 |
| 31 | United States | 2016 | 6520337.839 |
| 32 | United States | 2017 | 6483291.331 |
| 33 | United States | 2018 | 6671449.401 |
| 34 | United States | 2019 | 6558345.179 |
The real GDP for each countries during the same set of years is collected. The data is from the IMF: https://www.imf.org/external/datamapper/NGDPD@WEO/OEMDC/ADVEC/WEOWORLD
This is an inflation-adjusted measure of GDP, which is different from non-adjusted measure of nominal GDP.

df_gdp_raw = pd.read_excel("../data/imf real gdp.xlsx")
def gdp_filter(countries_list):
years = [2015, 2016, 2017, 2018, 2019]
df_gdp = pd.melt(df_gdp_raw,
id_vars = 'GDP, current prices (Billions of U.S. dollars)',
var_name = "Year",
value_name = "GDP")
df_gdp = df_gdp.rename(columns = {'GDP, current prices (Billions of U.S. dollars)':"Country"})
df_gdp = df_gdp[df_gdp["Year"].apply(lambda x: x in years)]
df_gdp = df_gdp[df_gdp["Country"].apply(lambda x: x in countries_list)]
df_gdp["GDP"] = df_gdp["GDP"].astype(float)
df_gdp = df_gdp.reset_index(drop = True)
return df_gdp
Here is a model that predicts the greenhouse gas emission from the GDP for each country.
countries_g7
['Canada', 'France', 'Germany', 'Italy', 'Japan', 'United Kingdom', 'United States']
df_static = ghg_filter(countries_g7)
df_static = df_static.sort_values(by = ["Country", "Year"],
ignore_index = True)
df_gdp = gdp_filter(countries_g7)
df_gdp = df_gdp.sort_values(by = ["Country", "Year"],
ignore_index = True)
#display(pd.concat([df_static, df_gdp], axis = 1))
df_static["GDP"] = df_gdp["GDP"]
df_static
| Country | Year | GHG | GDP | |
|---|---|---|---|---|
| 0 | Canada | 2015 | 723096.018 | 1556.508 |
| 1 | Canada | 2016 | 706933.908 | 1527.996 |
| 2 | Canada | 2017 | 716092.013 | 1649.266 |
| 3 | Canada | 2018 | 728475.890 | 1721.822 |
| 4 | Canada | 2019 | 730244.943 | 1741.576 |
| 5 | France | 2015 | 464205.673 | 2438.208 |
| 6 | France | 2016 | 467059.995 | 2471.261 |
| 7 | France | 2017 | 470564.316 | 2589.031 |
| 8 | France | 2018 | 452034.456 | 2789.680 |
| 9 | France | 2019 | 442984.668 | 2728.834 |
| 10 | Germany | 2015 | 904261.811 | 3357.926 |
| 11 | Germany | 2016 | 907967.906 | 3468.896 |
| 12 | Germany | 2017 | 892075.667 | 3689.547 |
| 13 | Germany | 2018 | 855890.413 | 3979.093 |
| 14 | Germany | 2019 | 809798.537 | 3888.756 |
| 15 | Italy | 2015 | 440436.701 | 1836.824 |
| 16 | Italy | 2016 | 437696.091 | 1876.554 |
| 17 | Italy | 2017 | 432713.736 | 1961.104 |
| 18 | Italy | 2018 | 428549.346 | 2093.088 |
| 19 | Italy | 2019 | 418280.596 | 2005.135 |
| 20 | Japan | 2015 | 1318516.697 | 4444.931 |
| 21 | Japan | 2016 | 1301855.892 | 5003.678 |
| 22 | Japan | 2017 | 1288528.719 | 4930.837 |
| 23 | Japan | 2018 | 1245020.887 | 5036.892 |
| 24 | Japan | 2019 | 1209493.354 | 5135.896 |
| 25 | United Kingdom | 2015 | 509050.718 | 2933.433 |
| 26 | United Kingdom | 2016 | 484146.195 | 2703.242 |
| 27 | United Kingdom | 2017 | 473389.976 | 2664.705 |
| 28 | United Kingdom | 2018 | 466666.983 | 2860.982 |
| 29 | United Kingdom | 2019 | 453101.458 | 2833.301 |
| 30 | United States | 2015 | 6671112.065 | 18206.025 |
| 31 | United States | 2016 | 6520337.839 | 18695.100 |
| 32 | United States | 2017 | 6483291.331 | 19479.625 |
| 33 | United States | 2018 | 6671449.401 | 20527.150 |
| 34 | United States | 2019 | 6558345.179 | 21372.600 |
fig01 = px.scatter(df_static,
x = "GDP",
y = "GHG",
color = "Country",
hover_data = ["Year"],
log_x = True,
log_y = True,
title = "GH Emission vs GDP",
labels={'GDP':'GDP, Billion USD',
'GHG':'GHG, Tonnes of CO2 Equivalent'},
height = 500,
width = 700
)
fig01.show()
df_static["log(GHG)"] = np.log(df_static["GHG"])
df_static["log(GDP)"] = np.log(df_static["GDP"])
from sklearn.linear_model import LinearRegression
coef_list = []
for each_country in countries_g7:
df_linreg = df_static[df_static["Country"] == each_country]
y = df_linreg["log(GHG)"].to_numpy()
X = df_linreg["log(GDP)"].to_numpy()
X = X[:, np.newaxis]
linreg = LinearRegression()
linreg.fit(X, y)
linreg.coef_
coef_list.append(linreg.coef_[0])
coef_list
[0.17989018168755153, -0.32148987241070526, -0.532405973950238, -0.2862370671505205, -0.45534032614830783, 0.2788329490702167, -0.00811169697189959]
df_predict = df_static[df_static["Year"] == 2019]
df_predict = df_predict[["Country", "GHG"]]
df_predict["coef"] = np.array(coef_list)
df_predict = df_predict.set_index(keys = "Country")
df_predict
| GHG | coef | |
|---|---|---|
| Country | ||
| Canada | 730244.943 | 0.179890 |
| France | 442984.668 | -0.321490 |
| Germany | 809798.537 | -0.532406 |
| Italy | 418280.596 | -0.286237 |
| Japan | 1209493.354 | -0.455340 |
| United Kingdom | 453101.458 | 0.278833 |
| United States | 6558345.179 | -0.008112 |
def predict_weekly_emission(country_name, change):
amount = df_predict.loc[country_name, "GHG"]
amount_week = amount*7/365
change_gh = change*df_predict.loc[country_name, "coef"]
amount_week = amount_week*(1 + change_gh/100)
return amount_week, change_gh
predict_weekly_emission("France", 5.6)
(8342.64647374541, -1.8003432854999493)
df_weekly = dynamic_data_filter(countries_g7, get_past_n_sundays())
df_weekly["GHG_weekly"] = 0
df_weekly["GHG_Change"] = 0
for index, row in df_weekly.iterrows():
amount_week, change_gh = predict_weekly_emission(row["Country"],
row["GDP_Change"])
df_weekly.loc[index, "GHG_weekly"] = amount_week
df_weekly.loc[index, "GHG_Change"] = change_gh
df_weekly
| Country | Week | GDP_Change | GHG_weekly | GHG_Change | |
|---|---|---|---|---|---|
| 0 | Canada | 2021-10-31 | 5.790923 | 14150.588712 | 1.041730 |
| 1 | Canada | 2021-11-07 | 6.030735 | 14156.630313 | 1.084870 |
| 2 | Canada | 2021-11-14 | 8.223468 | 14211.871979 | 1.479321 |
| 3 | France | 2021-10-31 | 12.360755 | 8157.993462 | -3.973858 |
| 4 | France | 2021-11-07 | 12.183032 | 8162.847541 | -3.916721 |
| 5 | France | 2021-11-14 | 13.139209 | 8136.731978 | -4.224122 |
| 6 | Germany | 2021-10-31 | 7.713478 | 14892.596392 | -4.106702 |
| 7 | Germany | 2021-11-07 | 5.859079 | 15045.926783 | -3.119409 |
| 8 | Germany | 2021-11-14 | 6.657616 | 14979.900005 | -3.544555 |
| 9 | Italy | 2021-10-31 | 11.119258 | 7766.505671 | -3.182744 |
| 10 | Italy | 2021-11-07 | 13.386478 | 7714.447083 | -3.831706 |
| 11 | Italy | 2021-11-14 | 14.386791 | 7691.478482 | -4.118033 |
| 12 | Japan | 2021-10-31 | 1.401660 | 23047.720057 | -0.638232 |
| 13 | Japan | 2021-11-07 | 2.303170 | 22952.502889 | -1.048726 |
| 14 | Japan | 2021-11-14 | 3.069305 | 22871.583968 | -1.397578 |
| 15 | United Kingdom | 2021-10-31 | 10.315507 | 8939.556735 | 2.876303 |
| 16 | United Kingdom | 2021-11-07 | 11.878025 | 8977.415781 | 3.311985 |
| 17 | United Kingdom | 2021-11-14 | 12.121530 | 8983.315808 | 3.379882 |
| 18 | United States | 2021-10-31 | 6.198364 | 125713.243411 | -0.050279 |
| 19 | United States | 2021-11-07 | 7.166315 | 125703.367788 | -0.058131 |
| 20 | United States | 2021-11-14 | 7.631384 | 125698.622876 | -0.061903 |
fig02 = px.bar(df_weekly,
x = "Week",
y = "GHG_weekly",
color = "Country",
title = "Weekly GH Emission Prediction",
labels={'GHG_weekly':'GHG, Tonnes of CO2 Equivalent'},
width = 700,
height = 500
)
fig02.update_layout(xaxis_type='category')
fig02.show()
fig03 = px.line(df_weekly,
x = "Week",
y = "GHG_Change",
color = "Country",
title = "Weekly GH Emission Change",
labels={'GHG_Change':'Percent YoY change in Emission'},
width = 700,
height = 500
)
fig03.update_layout(xaxis_type='category')
fig03.show()